{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# 1. 安装工具包\n",
    "在命令提示行输入：\n",
    "> pip install efficient-apriori"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2. 测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from efficient_apriori import apriori"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "transactions = [('eggs', 'bacon', 'soup'),\n",
    "                ('eggs', 'bacon', 'apple'),\n",
    "                ('soup', 'bacon', 'banana')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "itemsets, rules = apriori(transactions, min_support=0.5,  min_confidence=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{eggs} -> {bacon}, {soup} -> {bacon}]\n"
     ]
    }
   ],
   "source": [
    "print(rules)  # [{eggs} -> {bacon}, {soup} -> {bacon}]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{1: {('bacon',): 3, ('eggs',): 2, ('soup',): 2}, 2: {('bacon', 'eggs'): 2, ('bacon', 'soup'): 2}}\n"
     ]
    }
   ],
   "source": [
    "print(itemsets)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Print out every rule with 2 items on the left hand side,\n",
    "# 1 item on the right hand side, sorted by lift\n",
    "rules_rhs = filter(lambda rule: len(rule.lhs) == 1 and len(rule.rhs) == 1, rules)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{eggs} -> {bacon}, {soup} -> {bacon}]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(rules_rhs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_decimals', '_pf', 'confidence', 'conviction', 'count_full', 'count_lhs', 'count_rhs', 'lhs', 'lift', 'num_transactions', 'rhs', 'support']\n"
     ]
    }
   ],
   "source": [
    "print(dir(rules[0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{eggs} -> {bacon}, {soup} -> {bacon}]\n"
     ]
    }
   ],
   "source": [
    "rules_rhs = filter(lambda rule: len(rule.lhs) == 1 and len(rule.rhs) == 1, rules)\n",
    "result = sorted(rules_rhs, key=lambda rule: rule.lift)\n",
    "print(list(result))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{eggs} -> {bacon} (conf: 1.000, supp: 0.667, lift: 1.000, conv: 0.000)\n",
      "{soup} -> {bacon} (conf: 1.000, supp: 0.667, lift: 1.000, conv: 0.000)\n"
     ]
    }
   ],
   "source": [
    "for rule in result:\n",
    "    print(rule) # Prints the rule and its confidence, support, lift, ..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Lift(A→B) = (Confidence (A→B))/(Support (B))  \n",
    "refers to the increase in the ratio of sale of B when A is sold  \n",
    "P(A,B)/(P(A)P(B))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3. PPT中的例子"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "from efficient_apriori import apriori\n",
    "transactions = [('A', 'C', 'D'),\n",
    "                ('B', 'C', 'E'),\n",
    "                ('A', 'B', 'C', 'E'),\n",
    "                ('B', 'E')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "itemsets, rules = apriori(transactions, min_support=0.5,  min_confidence=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1: {('A',): 2, ('B',): 3, ('C',): 3, ('E',): 3},\n",
       " 2: {('A', 'C'): 2, ('B', 'C'): 2, ('B', 'E'): 3, ('C', 'E'): 2},\n",
       " 3: {('B', 'C', 'E'): 2}}"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "itemsets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{A} -> {C}, {E} -> {B}, {B} -> {E}, {C, E} -> {B}, {B, C} -> {E}]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rules"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4. 大数据关联规则\n",
    "\n",
    "如果要分析的数据较大，无法直接导入内存，可以传递一个返回生成器的函数，而不是交易的列表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "def data_generator(filename):\n",
    "    \"\"\"\n",
    "    Data generator, needs to return a generator to be called several times.\n",
    "    \"\"\"\n",
    "    def data_gen():\n",
    "        with open(filename) as file:\n",
    "            for line in file:\n",
    "                yield tuple(k.strip() for k in line.split(','))\n",
    "    return data_gen"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# file_path = \"https://github.com/seratch/apriori.js/blob/master/dataset.csv\"\n",
    "transactions = data_generator(\"dataset.csv\")\n",
    "itemsets, rules = apriori(transactions, min_support=0.5,  min_confidence=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1: {('A',): 2, ('B',): 3, ('C',): 3, ('E',): 3},\n",
       " 2: {('A', 'C'): 2, ('B', 'C'): 2, ('B', 'E'): 3, ('C', 'E'): 2},\n",
       " 3: {('B', 'C', 'E'): 2}}"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "itemsets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{A} -> {C}, {E} -> {B}, {B} -> {E}, {C, E} -> {B}, {B, C} -> {E}]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rules"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "transactions_2 = data_generator(\"store_data.csv\")\n",
    "itemsets_2, rules_2 = apriori(transactions_2, min_support=0.0045,  min_confidence=0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{A} -> {C}, {E} -> {B}, {B} -> {E}, {C, E} -> {B}, {B, C} -> {E}]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rules"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{A} -> {C} (conf: 1.000, supp: 0.500, lift: 1.333, conv: 250000000.000)\n",
      "{E} -> {B} (conf: 1.000, supp: 0.750, lift: 1.333, conv: 250000000.000)\n",
      "{B} -> {E} (conf: 1.000, supp: 0.750, lift: 1.333, conv: 250000000.000)\n",
      "{C, E} -> {B} (conf: 1.000, supp: 0.500, lift: 1.333, conv: 250000000.000)\n",
      "{B, C} -> {E} (conf: 1.000, supp: 0.500, lift: 1.333, conv: 250000000.000)\n"
     ]
    }
   ],
   "source": [
    "for rule in rules[:10]:\n",
    "    print(rule)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
